{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Requests"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#http://docs.python-requests.org/zh_CN/latest/user/quickstart.html\n",
    "import requests\n",
    "import json"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Get Request 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get the CO value of Hefei\n",
    "r = requests.get('http://www.pm25.in/api/querys/co.json?city=hefei&token=5j1znBVAsnSf5xQyNQyq')\n",
    "print(r.text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Status Code\n",
    "print(r.status_code)\n",
    "# The type of response\n",
    "print(type(r.text))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Use json to parse\n",
    "hjson = json.loads(r.text)\n",
    "\n",
    "# Type of hjson\n",
    "type(hjson)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "type(js)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# pretty printing\n",
    "js = json.dumps(hjson, sort_keys=True, indent=4, separators=(',', ';'), ensure_ascii=False)\n",
    "print(js)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Get Request 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    " \n",
    "URL = 'http://ip.taobao.com/service/getIpInfo.php'  # 淘宝IP地址库API\n",
    "try:\n",
    "    r = requests.get(URL, params={'ip': '202.38.64.1'}, timeout=10)\n",
    "    r.raise_for_status()\n",
    "except requests.RequestException as e:\n",
    "    print(e)\n",
    "else:\n",
    "    result = r.json()\n",
    "    print(type(result), result, sep='\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "result['code']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "result['data']['country']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Get Request 3"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Download images"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "\n",
    "def download_img(url_info):\n",
    "    if url_info[1]:\n",
    "        print(\"-----------downloading image %s\"%(url_info[0]))\n",
    "        try:\n",
    "            url = url_info[0]\n",
    "            response = requests.get(url)\n",
    "            img = response.content\n",
    "            \n",
    "            # Save Path\n",
    "            path='%s' % (url_info[1])\n",
    "            with open(path, 'wb') as f:\n",
    "                f.write(img)\n",
    "            return (True, path)\n",
    "        except Exception as ex:\n",
    "            print(\"--------Error----\")\n",
    "            pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "download_img(['http://vi.ustc.edu.cn/_upload/article/images/e5/4f/b1f558a04652bee07d23d4ce7e4c/W020110714574903022716.jpg','USTC.jpg'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import cv2\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "img = cv2.imread('ustc.jpg')\n",
    "img2=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)\n",
    "plt.imshow(img2)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Get Request 4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "import re\n",
    "# regular expression\n",
    "\n",
    "url = 'https://book.douban.com/'\n",
    "headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)\\\n",
    "Chrome/55.0.2883.87 Safari/537.36'}\n",
    "html = requests.get(url, headers=headers)\n",
    "html.encoding = 'utf-8'\n",
    "\n",
    "patter = re.compile('<li class.*?cover.*?href=\"(.*?)\".*?alt=\"(.*?)\".*?<p class=\"author\".*?>(.*?)</p>', re.S)\n",
    "titles = re.findall(patter, html.text)\n",
    "for each in titles:\n",
    "    print('书籍链接:{},书籍标题：{},---书籍作者：{}'.format(each[0], each[1],each[2].strip()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Seems there is something wrong with the first item, check it as a practice"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Post Request 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = 'http://httpbin.org/post?a=b'\n",
    "d = {'key1': 'value1', 'key2': 'value2'}\n",
    "r = requests.post(url, data=d)\n",
    "print(r.text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# https://www.juhe.cn/docs/api/id/277\n",
    "# History Weather of Suzhou\n",
    "url = 'http://v.juhe.cn/historyWeather/weather'\n",
    "d = {'key': 'YourOwnKey', 'city_id': '1157', 'weather_date': '2018-01-01'}\n",
    "r = requests.post(url, data=d)\n",
    "print(r.text)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# BeautifulSoup"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Example from offical tutorial"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#https://www.crummy.com/software/BeautifulSoup/bs4/doc.zh/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "html_doc = \"\"\"\n",
    "<html><head><title>The Dormouse's story</title></head>\n",
    "<body>\n",
    "<p class=\"title\"><b>The Dormouse's story</b></p>\n",
    "\n",
    "<p class=\"story\">Once upon a time there were three little sisters; and their names were\n",
    "<a href=\"http://example.com/elsie\" class=\"sister\" id=\"link1\">Elsie</a>,\n",
    "<a href=\"http://example.com/lacie\" class=\"sister\" id=\"link2\">Lacie</a> and\n",
    "<a href=\"http://example.com/tillie\" class=\"sister\" id=\"link3\">Tillie</a>;\n",
    "and they lived at the bottom of a well.</p>\n",
    "\n",
    "<p class=\"story\">...</p>\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from bs4 import BeautifulSoup\n",
    "soup = BeautifulSoup(html_doc)\n",
    "\n",
    "print(soup.prettify())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "soup.title"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "soup.title.name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "soup.title.string"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "soup.title.parent.parent.name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "soup.p"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "soup.p['class']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "soup.a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "soup.find_all('a')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "soup.find(id=\"link3\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for link in soup.find_all('a'):\n",
    "    print(link.get('href'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(soup.get_text())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Another Example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# https://blog.csdn.net/Shenpibaipao/article/details/80223002\n",
    "import requests\n",
    "from bs4 import BeautifulSoup\n",
    "pic_id = 0  # 图片编号\n",
    "url = 'http://www.ivsky.com/bizhi/stand_by_me_doraemon_v45983/'\n",
    "bs = BeautifulSoup(requests.get(url).content, \"lxml\")\n",
    "for i in bs.select('.il_img'):\n",
    "    pic_url = i.find('img')['src']\n",
    "    pic_file = open('./pic_'+str(pic_id)+'.jpg', 'wb') \n",
    "    pic_file.write(requests.get(pic_url).content)  \n",
    "    pic_id += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Take a look at the images\n",
    "import os\n",
    "import time\n",
    "import datetime\n",
    "\n",
    "dirlist = os.listdir(os.getcwd())\n",
    "for fi in dirlist:\n",
    "    if fi.startswith('pic_'):\n",
    "        statinfo=os.stat(fi)\n",
    "        tt = datetime.datetime.fromtimestamp(statinfo.st_ctime).strftime('%Y-%m-%d-%H-%M-%S-%f')\n",
    "        print(fi, tt)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# show 8 images\n",
    "import cv2\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "for r in range(4):\n",
    "    for c in range(2):\n",
    "        fn = 'pic_' + str(r * 2 + c) + '.jpg'\n",
    "        plt.subplot(421+ r * 2 + c)\n",
    "        img = cv2.imread(fn)\n",
    "        img2 = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n",
    "        plt.imshow(img2)\n",
    "plt.show()   \n",
    "        "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##  USTC Example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "from bs4 import BeautifulSoup\n",
    "\n",
    "# Download USTC News on the frontpage\n",
    "url = 'http://www.ustc.edu.cn'\n",
    "bs = BeautifulSoup(requests.get(url).content, \"lxml\")\n",
    "news = bs.find(id=\"wp_news_w1\")\n",
    "#print(news)\n",
    "print(\"USTC News\")\n",
    "for item in news.find_all('a'):\n",
    "    print(item[\"title\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Seems there is something wrong here\n",
    "\n",
    "notes = bs.find(id=\"wp_news_w2\")\n",
    "#print(notes)\n",
    "print(\"USTC Notifications\")\n",
    "for item in notes.find_all('a'):\n",
    "    print(item[\"title\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Practice\n",
    "# Download all the headline news from ifeng.com/news.sina.com.cn"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Get all the cities"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "from bs4 import BeautifulSoup\n",
    "\n",
    "url = 'http://www.pm25.in'\n",
    "bs = BeautifulSoup(requests.get(url).content, \"lxml\")\n",
    "alphabet = bs.find(name = 'div', attrs={\"class\":'all'})\n",
    "#print(alphabet)\n",
    "cities = dict()\n",
    "for item in alphabet.find_all('a'):\n",
    "    print(item['href'][1:],item.contents[0])\n",
    "    cities[item['href'][1:]] = item.contents[0]\n",
    "    # why [1:] instead of all?\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Flask"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#https://dormousehole.readthedocs.io/en/latest/quickstart.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# For windows cmd\n",
    "# set FLASK_APP=flask_filename.py\n",
    "# flask run\n",
    "# For other platforms, go to quickstart.html\n",
    "\n",
    "# try flask_test.py\n",
    "# try application.py"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Sqlite3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sqlite3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Connect a database and create a table\n",
    "conn = sqlite3.connect('cities.db')\n",
    "print(\"Opened database successfully\")\n",
    "c = conn.cursor()\n",
    "c.execute('''CREATE TABLE CITIES\n",
    "       (ID INT PRIMARY KEY     NOT NULL,\n",
    "       EN_NAME        TEXT    NOT NULL,\n",
    "       ZH_NAME        TEXT    NOT NULL);''')\n",
    "print(\"Cities Table Created!\")\n",
    "conn.commit()\n",
    "conn.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Show tables\n",
    "conn = sqlite3.connect('cities.db')\n",
    "c = conn.cursor()\n",
    "c.execute(\"select name from sqlite_master where type='table' order by name;\")\n",
    "print(c.fetchall())\n",
    "conn.commit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Show table structure\n",
    "c.execute(\"PRAGMA table_info(CITIES)\")\n",
    "print(c.fetchall())\n",
    "conn.commit()\n",
    "conn.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Delete items\n",
    "conn = sqlite3.connect('cities.db')\n",
    "c = conn.cursor()\n",
    "c.execute(\"delete from CITIES;\")\n",
    "conn.commit()\n",
    "conn.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Insert data\n",
    "conn = sqlite3.connect('cities.db')\n",
    "c = conn.cursor()\n",
    "\n",
    "# make sure you already have the dict CITIES in the memory\n",
    "for i, k in enumerate(cities):\n",
    "    sql = \"INSERT INTO CITIES (ID,EN_NAME,ZH_NAME) \\\n",
    "      VALUES (%d, '%s', '%s');\" % (i, k, cities[k])\n",
    "    print(sql)\n",
    "    c.execute(sql)\n",
    "\n",
    "conn.commit()\n",
    "print(\"Records created successfully\")\n",
    "conn.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# show data\n",
    "conn = sqlite3.connect('cities.db')\n",
    "c = conn.cursor()\n",
    "c.execute(\"select * from CITIES;\")\n",
    "data = c.fetchall()\n",
    "conn.commit()\n",
    "conn.close()\n",
    "\n",
    "for item in data:\n",
    "    print(item)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# use where\n",
    "conn = sqlite3.connect('cities.db')\n",
    "c = conn.cursor()\n",
    "c.execute(\"select * from CITIES where EN_NAME='hefei';\")\n",
    "data = c.fetchall()\n",
    "conn.commit()\n",
    "conn.close()\n",
    "\n",
    "for item in data:\n",
    "    print(item)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
